1 /* 2 * Hunt - a framework for web and console application based on Collie using Dlang development 3 * 4 * Copyright (C) 2015-2017 Shanghai Putao Technology Co., Ltd 5 * 6 * Developer: HuntLabs 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module hunt.utils.url; 13 /** 14 * A URL handling library. 15 * 16 * URLs are Unique Resource Locators. They consist of a scheme and a host, with some optional 17 * elements like port, path, username, and password. 18 * 19 * This module aims to make it simple to muck about with them. 20 * 21 * Example usage: 22 * --- 23 * auto url = "ssh://me:password@192.168.0.8/".parseURL; 24 * auto files = system("ssh", url.toString, "ls").splitLines; 25 * foreach (file; files) { 26 * system("scp", url ~ file, "."); 27 * } 28 * --- 29 * 30 * License: The MIT license. 31 */ 32 33 import std.conv; 34 import std.string; 35 import std.exception; 36 37 pure: 38 @safe: 39 40 /// An exception thrown when something bad happens with URLs. 41 class URLException : Exception 42 { 43 mixin basicExceptionCtors; 44 } 45 46 /** 47 * A mapping from schemes to their default ports. 48 * 49 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 50 * use even if they use ports. Entries here should be treated as best guesses. 51 */ 52 enum ushort[string] schemeToDefaultPort = [ 53 "aaa": 3868, 54 "aaas": 5658, 55 "acap": 674, 56 "amqp": 5672, 57 "cap": 1026, 58 "coap": 5683, 59 "coaps": 5684, 60 "dav": 443, 61 "dict": 2628, 62 "ftp": 21, 63 "git": 9418, 64 "go": 1096, 65 "gopher": 70, 66 "http": 80, 67 "https": 443, 68 "ws": 80, 69 "wss": 443, 70 "iac": 4569, 71 "icap": 1344, 72 "imap": 143, 73 "ipp": 631, 74 "ipps": 631, // yes, they're both mapped to port 631 75 "irc": 6667, // De facto default port, not the IANA reserved port. 76 "ircs": 6697, 77 "iris": 702, // defaults to iris.beep 78 "iris.beep": 702, 79 "iris.lwz": 715, 80 "iris.xpc": 713, 81 "iris.xpcs": 714, 82 "jabber": 5222, // client-to-server 83 "ldap": 389, 84 "ldaps": 636, 85 "msrp": 2855, 86 "msrps": 2855, 87 "mtqp": 1038, 88 "mupdate": 3905, 89 "news": 119, 90 "nfs": 2049, 91 "pop": 110, 92 "redis": 6379, 93 "reload": 6084, 94 "rsync": 873, 95 "rtmfp": 1935, 96 "rtsp": 554, 97 "shttp": 80, 98 "sieve": 4190, 99 "sip": 5060, 100 "sips": 5061, 101 "smb": 445, 102 "smtp": 25, 103 "snews": 563, 104 "snmp": 161, 105 "soap.beep": 605, 106 "ssh": 22, 107 "stun": 3478, 108 "stuns": 5349, 109 "svn": 3690, 110 "teamspeak": 9987, 111 "telnet": 23, 112 "tftp": 69, 113 "tip": 3372, 114 "mysql": 3306, 115 "postgresql": 5432, 116 ]; 117 118 /** 119 * A Unique Resource Locator. 120 * 121 * URLs can be parsed (see parseURL) and implicitly convert to strings. 122 */ 123 struct URL 124 { 125 hash_t toHash() const @safe nothrow 126 { 127 return asTuple().toHash(); 128 } 129 130 pure: 131 /// The URL scheme. For instance, ssh, ftp, or https. 132 string scheme; 133 134 /// The username in this URL. Usually absent. If present, there will also be a password. 135 string user; 136 137 /// The password in this URL. Usually absent. 138 string pass; 139 140 /// The hostname. 141 string host; 142 143 string[string] queryArr; 144 145 /** 146 * The port. 147 * 148 * This is inferred from the scheme if it isn't present in the URL itself. 149 * If the scheme is not known and the port is not present, the port will be given as 0. 150 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 151 * 152 * If you explicitly need to detect whether the user provided a port, check the providedPort 153 * field. 154 */ 155 @property ushort port() const nothrow 156 { 157 if (providedPort != 0) { 158 return providedPort; 159 } 160 if (auto p = scheme in schemeToDefaultPort) { 161 return *p; 162 } 163 return 0; 164 } 165 166 /** 167 * Set the port. 168 * 169 * This sets the providedPort field and is provided for convenience. 170 */ 171 @property ushort port(ushort value) nothrow 172 { 173 return providedPort = value; 174 } 175 176 /// The port that was explicitly provided in the URL. 177 ushort providedPort; 178 179 /** 180 * The path. 181 * 182 * For instance, in the URL https://cnn.com/news/story/17774?visited=false, the path is 183 * "/news/story/17774". 184 */ 185 string path; 186 187 /** 188 * The query parameters associated with this URL. 189 */ 190 string query; 191 192 /** 193 * The fragment. In web documents, this typically refers to an anchor element. 194 * For instance, in the URL https://cnn.com/news/story/17774#header2, the fragment is "header2". 195 */ 196 string fragment; 197 198 /** 199 * Convert this URL to a string. 200 * The string is properly formatted and usable for, eg, a web request. 201 */ 202 string toString() const 203 { 204 return toString(false); 205 } 206 207 /** 208 * Convert this URL to a string. 209 * 210 * The string is intended to be human-readable rather than machine-readable. 211 */ 212 string toHumanReadableString() const 213 { 214 return toString(true); 215 } 216 217 /// 218 unittest 219 { 220 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 221 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString); 222 assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString); 223 } 224 225 unittest 226 { 227 assert("http://example.org/some_path".parseURL.toHumanReadableString == 228 "http://example.org/some_path"); 229 } 230 231 private string toString(bool humanReadable) const 232 { 233 import std.array : Appender; 234 Appender!string s; 235 s ~= scheme; 236 s ~= "://"; 237 if (user) { 238 s ~= humanReadable ? user : user.percentEncode; 239 s ~= ":"; 240 s ~= humanReadable ? pass : pass.percentEncode; 241 s ~= "@"; 242 } 243 s ~= humanReadable ? host : host.toPuny; 244 if (providedPort) { 245 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 246 s ~= ":"; 247 s ~= providedPort.to!string; 248 } 249 } 250 string p = path; 251 if (p.length == 0 || p == "/") { 252 s ~= '/'; 253 } else { 254 if (humanReadable) { 255 s ~= p; 256 } else { 257 if (p[0] == '/') { 258 p = p[1..$]; 259 } 260 foreach (part; p.split('/')) { 261 s ~= '/'; 262 s ~= part.percentEncode; 263 } 264 } 265 } 266 if (query.length) { 267 s ~= '?'; 268 s ~= query; 269 } 270 if (fragment) { 271 s ~= '#'; 272 s ~= fragment.percentEncode; 273 } 274 return s.data; 275 } 276 277 /// Implicitly convert URLs to strings. 278 alias toString this; 279 280 /** 281 Compare two URLs. 282 283 I tried to make the comparison produce a sort order that seems natural, so it's not identical 284 to sorting based on .toString(). For instance, username/password have lower priority than 285 host. The scheme has higher priority than port but lower than host. 286 287 While the output of this is guaranteed to provide a total ordering, and I've attempted to make 288 it human-friendly, it isn't guaranteed to be consistent between versions. The implementation 289 and its results can change without a minor version increase. 290 */ 291 int opCmp(const URL other) const 292 { 293 return asTuple.opCmp(other.asTuple); 294 } 295 296 private auto asTuple() const nothrow 297 { 298 import std.typecons : tuple; 299 return tuple(host, scheme, port, user, pass, path, query); 300 } 301 302 /// Equality checks. 303 bool opEquals(string other) const 304 { 305 URL o; 306 if (!tryParseURL(other, o)) 307 { 308 return false; 309 } 310 return asTuple() == o.asTuple(); 311 } 312 313 /// Ditto 314 bool opEquals(ref const URL other) const 315 { 316 return asTuple() == other.asTuple(); 317 } 318 319 /// Ditto 320 bool opEquals(const URL other) const 321 { 322 return asTuple() == other.asTuple(); 323 } 324 325 unittest 326 { 327 import std.algorithm, std.array, std.format; 328 assert("http://example.org/some_path".parseURL > "http://example.org/other_path".parseURL); 329 alias sorted = std.algorithm.sort; 330 auto parsedURLs = 331 [ 332 "http://example.org/some_path", 333 "http://example.org:81/other_path", 334 "http://example.org/other_path", 335 "https://example.org/first_path", 336 "http://example.xyz/other_other_path", 337 "http://me:secret@blog.ikeran.org/wp_admin", 338 ].map!(x => x.parseURL).array; 339 auto urls = sorted(parsedURLs).map!(x => x.toHumanReadableString).array; 340 auto expected = 341 [ 342 "http://me:secret@blog.ikeran.org/wp_admin", 343 "http://example.org/other_path", 344 "http://example.org/some_path", 345 "http://example.org:81/other_path", 346 "https://example.org/first_path", 347 "http://example.xyz/other_other_path", 348 ]; 349 assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls)); 350 } 351 352 unittest 353 { 354 auto a = "http://x.org/a?b=c".parseURL; 355 auto b = "http://x.org/a?d=e".parseURL; 356 auto c = "http://x.org/a?b=a".parseURL; 357 assert(a < b); 358 assert(c < b); 359 assert(c < a); 360 } 361 362 /** 363 * The append operator (~). 364 * 365 * The append operator for URLs returns a new URL with the given string appended as a path 366 * element to the URL's path. It only adds new path elements (or sequences of path elements). 367 * 368 * Don't worry about path separators; whether you include them or not, it will just work. 369 * 370 * Query elements are copied. 371 * 372 * Examples: 373 * --- 374 * auto random = "http://testdata.org/random".parseURL; 375 * auto randInt = random ~ "int"; 376 * writeln(randInt); // prints "http://testdata.org/random/int" 377 * --- 378 */ 379 URL opBinary(string op : "~")(string subsequentPath) { 380 URL other = this; 381 other ~= subsequentPath; 382 return other; 383 } 384 385 /** 386 * The append-in-place operator (~=). 387 * 388 * The append operator for URLs adds a path element to this URL. It only adds new path elements 389 * (or sequences of path elements). 390 * 391 * Don't worry about path separators; whether you include them or not, it will just work. 392 * 393 * Examples: 394 * --- 395 * auto random = "http://testdata.org/random".parseURL; 396 * random ~= "int"; 397 * writeln(random); // prints "http://testdata.org/random/int" 398 * --- 399 */ 400 URL opOpAssign(string op : "~")(string subsequentPath) { 401 if (path.endsWith("/")) { 402 if (subsequentPath.startsWith("/")) { 403 path ~= subsequentPath[1..$]; 404 } else { 405 path ~= subsequentPath; 406 } 407 } else { 408 if (!subsequentPath.startsWith("/")) { 409 path ~= '/'; 410 } 411 path ~= subsequentPath; 412 } 413 return this; 414 } 415 416 /** 417 * Convert a relative URL to an absolute URL. 418 * 419 * This is designed so that you can scrape a webpage and quickly convert links within the 420 * page to URLs you can actually work with, but you're clever; I'm sure you'll find more uses 421 * for it. 422 * 423 * It's biased toward HTTP family URLs; as one quirk, "//" is interpreted as "same scheme, 424 * different everything else", which might not be desirable for all schemes. 425 * 426 * This only handles URLs, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for 427 * instance, this will give you our best attempt to parse it as a URL. 428 * 429 * Examples: 430 * --- 431 * auto base = "https://example.org/passworddb?secure=false".parseURL; 432 * 433 * // Download https://example.org/passworddb/by-username/dhasenan 434 * download(base.resolve("by-username/dhasenan")); 435 * 436 * // Download https://example.org/static/style.css 437 * download(base.resolve("/static/style.css")); 438 * 439 * // Download https://cdn.example.net/jquery.js 440 * download(base.resolve("https://cdn.example.net/jquery.js")); 441 * --- 442 */ 443 URL resolve(string other) 444 { 445 if (other.length == 0) return this; 446 if (other[0] == '/') 447 { 448 if (other.length > 1 && other[1] == '/') 449 { 450 // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch URL" 451 return parseURL(this.scheme ~ ':' ~ other); 452 } 453 } 454 else 455 { 456 auto schemeSep = other.indexOf("://"); 457 if (schemeSep >= 0 && schemeSep < other.indexOf("/")) 458 // separate URL 459 { 460 return other.parseURL; 461 } 462 } 463 464 URL ret = this; 465 ret.path = ""; 466 if (other[0] != '/') 467 { 468 // relative to something 469 if (!this.path.length) 470 { 471 // nothing to be relative to 472 other = "/" ~ other; 473 } 474 else if (this.path[$-1] == '/') 475 { 476 // directory-style path for the current thing 477 // resolve relative to this directory 478 other = this.path ~ other; 479 } 480 else 481 { 482 // this is a file-like thing 483 // find the 'directory' and relative to that 484 other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other; 485 } 486 } 487 // collapse /foo/../ to / 488 if (other.indexOf("/../") >= 0) 489 { 490 import std.array : Appender, array; 491 import std.string : split; 492 import std.algorithm.iteration : joiner, filter; 493 string[] parts = other.split('/'); 494 for (int i = 0; i < parts.length; i++) 495 { 496 if (parts[i] == "..") 497 { 498 for (int j = i - 1; j >= 0; j--) 499 { 500 if (parts[j] != null) 501 { 502 parts[j] = null; 503 parts[i] = null; 504 break; 505 } 506 } 507 } 508 } 509 other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string; 510 } 511 parsePathAndQuery(ret, other); 512 return ret; 513 } 514 515 unittest 516 { 517 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 518 auto b = a.resolve("contacts/index.html"); 519 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html"); 520 } 521 522 unittest 523 { 524 auto a = "http://alcyius.com/dndtools/index.html?a=b".parseURL; 525 auto b = a.resolve("contacts/index.html?foo=bar"); 526 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar"); 527 } 528 529 unittest 530 { 531 auto a = "http://alcyius.com/dndtools/index.html".parseURL; 532 auto b = a.resolve("../index.html"); 533 assert(b.toString == "http://alcyius.com/index.html", b.toString); 534 } 535 536 unittest 537 { 538 auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseURL; 539 auto b = a.resolve("../index.html"); 540 assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString); 541 } 542 } 543 544 /** 545 * Parse a URL from a string. 546 * 547 * This attempts to parse a wide range of URLs as people might actually type them. Some mistakes 548 * may be made. However, any URL in a correct format will be parsed correctly. 549 */ 550 bool tryParseURL(string value, out URL url) 551 { 552 url = URL.init; 553 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 554 // Scheme is optional in common use. We infer 'http' if it's not given. 555 auto i = value.indexOf("//"); 556 if (i > -1) { 557 if (i > 1) { 558 url.scheme = value[0..i-1]; 559 } 560 value = value[i+2 .. $]; 561 } else { 562 url.scheme = "http"; 563 } 564 // Check for an ipv6 hostname. 565 // [user:password@]host[:port]][/]path[?query][#fragment 566 i = value.indexOfAny([':', '/', '[']); 567 if (i == -1) { 568 // Just a hostname. 569 url.host = value.fromPuny; 570 return true; 571 } 572 573 if (value[i] == ':') { 574 // This could be between username and password, or it could be between host and port. 575 auto j = value.indexOfAny(['@', '/']); 576 if (j > -1 && value[j] == '@') { 577 try { 578 url.user = value[0..i].percentDecode; 579 url.pass = value[i+1 .. j].percentDecode; 580 } catch (URLException) { 581 return false; 582 } 583 value = value[j+1 .. $]; 584 } 585 } 586 587 // It's trying to be a host/port, not a user/pass. 588 i = value.indexOfAny([':', '/', '[']); 589 if (i == -1) { 590 url.host = value.fromPuny; 591 return true; 592 } 593 594 // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't 595 // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we 596 // handle specially, and are offset with square brackets. 597 if (value[i] == '[') { 598 auto j = value[i..$].indexOf(']'); 599 if (j < 0) { 600 // unterminated ipv6 addr 601 return false; 602 } 603 // includes square brackets 604 url.host = value[i .. i+j+1]; 605 value = value[i+j+1 .. $]; 606 if (value.length == 0) { 607 // read to end of string; we finished parse 608 return true; 609 } 610 if (value[0] != ':' && value[0] != '?' && value[0] != '/') { 611 return false; 612 } 613 } else { 614 // Normal host. 615 url.host = value[0..i].fromPuny; 616 value = value[i .. $]; 617 } 618 619 if (value[0] == ':') { 620 auto end = value.indexOf('/'); 621 if (end == -1) { 622 end = value.length; 623 } 624 try { 625 url.port = value[1 .. end].to!ushort; 626 } catch (ConvException) { 627 return false; 628 } 629 value = value[end .. $]; 630 if (value.length == 0) { 631 return true; 632 } 633 } 634 return parsePathAndQuery(url, value); 635 } 636 637 private bool parsePathAndQuery(ref URL url, string value) 638 { 639 auto i = value.indexOfAny("?#"); 640 if (i == -1) 641 { 642 url.path = value.percentDecode; 643 return true; 644 } 645 646 try 647 { 648 url.path = value[0..i].percentDecode; 649 } 650 catch (URLException) 651 { 652 return false; 653 } 654 655 auto c = value[i]; 656 value = value[i + 1 .. $]; 657 if (c == '?') 658 { 659 i = value.indexOf('#'); 660 string query; 661 if (i < 0) 662 { 663 query = value; 664 value = null; 665 } 666 else 667 { 668 query = value[0..i]; 669 value = value[i + 1 .. $]; 670 } 671 url.query = query; 672 673 foreach(kv; query.split('&')) 674 { 675 auto pos = kv.indexOf('='); 676 if(pos != - 1) 677 { 678 url.queryArr[kv[0 .. pos]] = kv[pos+1 .. $]; 679 } 680 } 681 } 682 683 try 684 { 685 url.fragment = value.percentDecode; 686 } 687 catch (URLException) 688 { 689 return false; 690 } 691 692 return true; 693 } 694 695 unittest { 696 { 697 // Basic. 698 URL url; 699 with (url) { 700 scheme = "https"; 701 host = "example.org"; 702 path = "/foo/bar"; 703 //queryParams.add("hello", "world"); 704 //queryParams.add("gibe", "clay"); 705 fragment = "frag"; 706 } 707 //assert( 708 // Not sure what order it'll come out in. 709 //url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 710 //url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 711 //url.toString); 712 } 713 { 714 // Percent encoded. 715 URL url; 716 with (url) { 717 scheme = "https"; 718 host = "example.org"; 719 path = "/f☃o"; 720 //queryParams.add("❄", "❀"); 721 //queryParams.add("[", "]"); 722 fragment = "ş"; 723 } 724 //assert( 725 // Not sure what order it'll come out in. 726 //url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 727 //url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 728 //url.toString); 729 } 730 { 731 // Port, user, pass. 732 URL url; 733 with (url) { 734 scheme = "https"; 735 host = "example.org"; 736 user = "dhasenan"; 737 pass = "itsasecret"; 738 port = 17; 739 } 740 assert( 741 url.toString == "https://dhasenan:itsasecret@example.org:17/", 742 url.toString); 743 } 744 { 745 // Query with no path. 746 URL url; 747 with (url) { 748 scheme = "https"; 749 host = "example.org"; 750 //queryParams.add("hi", "bye"); 751 } 752 //assert( 753 //url.toString == "https://example.org/?hi=bye", 754 //url.toString); 755 } 756 } 757 758 unittest 759 { 760 auto url = "//foo/bar".parseURL; 761 assert(url.host == "foo", "expected host foo, got " ~ url.host); 762 assert(url.path == "/bar"); 763 } 764 765 unittest 766 { 767 // ipv6 hostnames! 768 { 769 // full range of data 770 auto url = parseURL("https://bob:secret@[::1]:2771/foo/bar"); 771 assert(url.scheme == "https", url.scheme); 772 assert(url.user == "bob", url.user); 773 assert(url.pass == "secret", url.pass); 774 assert(url.host == "[::1]", url.host); 775 assert(url.port == 2771, url.port.to!string); 776 assert(url.path == "/foo/bar", url.path); 777 } 778 779 // minimal 780 { 781 auto url = parseURL("[::1]"); 782 assert(url.host == "[::1]", url.host); 783 } 784 785 // some random bits 786 { 787 auto url = parseURL("http://[::1]/foo"); 788 assert(url.scheme == "http", url.scheme); 789 assert(url.host == "[::1]", url.host); 790 assert(url.path == "/foo", url.path); 791 } 792 793 { 794 auto url = parseURL("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding"); 795 assert(url.scheme == "https"); 796 assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]"); 797 assert(url.path == "/"); 798 assert(url.fragment == "justkidding"); 799 } 800 } 801 802 unittest 803 { 804 auto url = "localhost:5984".parseURL; 805 auto url2 = url ~ "db1"; 806 assert(url2.toString == "http://localhost:5984/db1", url2.toString); 807 auto url3 = url2 ~ "_all_docs"; 808 assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString); 809 } 810 811 /// 812 unittest { 813 { 814 // Basic. 815 URL url; 816 with (url) { 817 scheme = "https"; 818 host = "example.org"; 819 path = "/foo/bar"; 820 //queryParams.add("hello", "world"); 821 //queryParams.add("gibe", "clay"); 822 fragment = "frag"; 823 } 824 //assert( 825 // Not sure what order it'll come out in. 826 //url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 827 //url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 828 //url.toString); 829 } 830 { 831 // Passing an array of query values. 832 URL url; 833 with (url) { 834 scheme = "https"; 835 host = "example.org"; 836 path = "/foo/bar"; 837 //queryParams.add("hello", "world"); 838 //queryParams.add("hello", "aether"); 839 fragment = "frag"; 840 } 841 //assert( 842 // Not sure what order it'll come out in. 843 //url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" || 844 //url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag", 845 //url.toString); 846 } 847 { 848 // Percent encoded. 849 URL url; 850 with (url) { 851 scheme = "https"; 852 host = "example.org"; 853 path = "/f☃o"; 854 //queryParams.add("❄", "❀"); 855 //queryParams.add("[", "]"); 856 fragment = "ş"; 857 } 858 //assert( 859 // Not sure what order it'll come out in. 860 //url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 861 //url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 862 //url.toString); 863 } 864 { 865 // Port, user, pass. 866 URL url; 867 with (url) { 868 scheme = "https"; 869 host = "example.org"; 870 user = "dhasenan"; 871 pass = "itsasecret"; 872 port = 17; 873 } 874 assert( 875 url.toString == "https://dhasenan:itsasecret@example.org:17/", 876 url.toString); 877 } 878 { 879 // Query with no path. 880 URL url; 881 with (url) { 882 scheme = "https"; 883 host = "example.org"; 884 //queryParams.add("hi", "bye"); 885 } 886 //assert( 887 //url.toString == "https://example.org/?hi=bye", 888 //url.toString); 889 } 890 } 891 892 unittest { 893 // Percent decoding. 894 895 // http://#:!:@ 896 auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash%EF%BF%BD"; 897 auto url = urlString.parseURL; 898 assert(url.user == "#"); 899 assert(url.pass == "!:"); 900 assert(url.host == "example.org"); 901 assert(url.path == "/{/}"); 902 //assert(url.queryParams[";"].front == ""); 903 //assert(url.queryParams["&"].front == "="); 904 assert(url.fragment == "#hash�"); 905 906 // Round trip. 907 assert(urlString == urlString.parseURL.toString, urlString.parseURL.toString); 908 assert(urlString == urlString.parseURL.toString.parseURL.toString); 909 } 910 911 unittest { 912 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseURL; 913 assert(url.host == "☂.☃.org", url.host); 914 } 915 916 unittest { 917 auto url = "https://☂.☃.org/?hi=bye".parseURL; 918 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye"); 919 } 920 921 /// 922 unittest { 923 // There's an existing path. 924 auto url = parseURL("http://example.org/foo"); 925 URL url2; 926 // No slash? Assume it needs a slash. 927 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 928 // With slash? Don't add another. 929 url2 = url ~ "/bar"; 930 assert(url2.toString == "http://example.org/foo/bar", url2.toString); 931 url ~= "bar"; 932 assert(url.toString == "http://example.org/foo/bar"); 933 934 // Path already ends with a slash; don't add another. 935 url = parseURL("http://example.org/foo/"); 936 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 937 // Still don't add one even if you're appending with a slash. 938 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 939 url ~= "/bar"; 940 assert(url.toString == "http://example.org/foo/bar"); 941 942 // No path. 943 url = parseURL("http://example.org"); 944 assert((url ~ "bar").toString == "http://example.org/bar"); 945 assert((url ~ "/bar").toString == "http://example.org/bar"); 946 url ~= "bar"; 947 assert(url.toString == "http://example.org/bar"); 948 949 // Path is just a slash. 950 url = parseURL("http://example.org/"); 951 assert((url ~ "bar").toString == "http://example.org/bar"); 952 assert((url ~ "/bar").toString == "http://example.org/bar"); 953 url ~= "bar"; 954 assert(url.toString == "http://example.org/bar", url.toString); 955 956 // No path, just fragment. 957 url = "ircs://irc.freenode.com/#d".parseURL; 958 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 959 } 960 unittest 961 { 962 // basic resolve() 963 { 964 auto base = "https://example.org/this/".parseURL; 965 assert(base.resolve("that") == "https://example.org/this/that"); 966 assert(base.resolve("/that") == "https://example.org/that"); 967 assert(base.resolve("//example.net/that") == "https://example.net/that"); 968 } 969 970 // ensure we don't preserve query params 971 { 972 auto base = "https://example.org/this?query=value&other=value2".parseURL; 973 //assert(base.resolve("that") == "https://example.org/that"); 974 //assert(base.resolve("/that") == "https://example.org/that"); 975 //assert(base.resolve("tother/that") == "https://example.org/tother/that"); 976 //assert(base.resolve("//example.net/that") == "https://example.net/that"); 977 } 978 } 979 980 981 unittest 982 { 983 import std.net.curl; 984 auto url = "http://example.org".parseURL; 985 assert(is(typeof(std.net.curl.get(url)))); 986 } 987 988 /** 989 * Parse the input string as a URL. 990 * 991 * Throws: 992 * URLException if the string was in an incorrect format. 993 */ 994 URL parseURL(string value) { 995 URL url; 996 if (tryParseURL(value, url)) { 997 return url; 998 } 999 throw new URLException("failed to parse URL " ~ value); 1000 } 1001 1002 /// 1003 unittest { 1004 { 1005 // Infer scheme 1006 auto u1 = parseURL("example.org"); 1007 assert(u1.scheme == "http"); 1008 assert(u1.host == "example.org"); 1009 assert(u1.path == ""); 1010 assert(u1.port == 80); 1011 assert(u1.providedPort == 0); 1012 assert(u1.fragment == ""); 1013 } 1014 { 1015 // Simple host and scheme 1016 auto u1 = parseURL("https://example.org"); 1017 assert(u1.scheme == "https"); 1018 assert(u1.host == "example.org"); 1019 assert(u1.path == ""); 1020 assert(u1.port == 443); 1021 assert(u1.providedPort == 0); 1022 } 1023 { 1024 // With path 1025 auto u1 = parseURL("https://example.org/foo/bar"); 1026 assert(u1.scheme == "https"); 1027 assert(u1.host == "example.org"); 1028 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1029 assert(u1.port == 443); 1030 assert(u1.providedPort == 0); 1031 } 1032 { 1033 // With explicit port 1034 auto u1 = parseURL("https://example.org:1021/foo/bar"); 1035 assert(u1.scheme == "https"); 1036 assert(u1.host == "example.org"); 1037 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1038 assert(u1.port == 1021); 1039 assert(u1.providedPort == 1021); 1040 } 1041 { 1042 // With user 1043 auto u1 = parseURL("https://bob:secret@example.org/foo/bar"); 1044 assert(u1.scheme == "https"); 1045 assert(u1.host == "example.org"); 1046 assert(u1.path == "/foo/bar"); 1047 assert(u1.port == 443); 1048 assert(u1.user == "bob"); 1049 assert(u1.pass == "secret"); 1050 } 1051 { 1052 // With user, URL-encoded 1053 auto u1 = parseURL("https://bob%21:secret%21%3F@example.org/foo/bar"); 1054 assert(u1.scheme == "https"); 1055 assert(u1.host == "example.org"); 1056 assert(u1.path == "/foo/bar"); 1057 assert(u1.port == 443); 1058 assert(u1.user == "bob!"); 1059 assert(u1.pass == "secret!?"); 1060 } 1061 { 1062 // With user and port and path 1063 auto u1 = parseURL("https://bob:secret@example.org:2210/foo/bar"); 1064 assert(u1.scheme == "https"); 1065 assert(u1.host == "example.org"); 1066 assert(u1.path == "/foo/bar"); 1067 assert(u1.port == 2210); 1068 assert(u1.user == "bob"); 1069 assert(u1.pass == "secret"); 1070 assert(u1.fragment == ""); 1071 } 1072 { 1073 // With query string 1074 auto u1 = parseURL("https://example.org/?login=true"); 1075 assert(u1.scheme == "https"); 1076 assert(u1.host == "example.org"); 1077 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1078 //assert(u1.queryParams["login"].front == "true"); 1079 assert(u1.fragment == ""); 1080 } 1081 { 1082 // With query string and fragment 1083 auto u1 = parseURL("https://example.org/?login=true#justkidding"); 1084 assert(u1.scheme == "https"); 1085 assert(u1.host == "example.org"); 1086 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1087 //assert(u1.queryParams["login"].front == "true"); 1088 assert(u1.fragment == "justkidding"); 1089 } 1090 { 1091 // With URL-encoded values 1092 auto u1 = parseURL("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 1093 assert(u1.scheme == "https"); 1094 assert(u1.host == "example.org"); 1095 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 1096 //assert(u1.queryParams["❄"].front == "="); 1097 assert(u1.fragment == "^"); 1098 } 1099 } 1100 1101 unittest { 1102 assert(parseURL("http://example.org").port == 80); 1103 assert(parseURL("http://example.org:5326").port == 5326); 1104 1105 auto url = parseURL("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 1106 assert(url.scheme == "redis"); 1107 assert(url.user == "admin"); 1108 assert(url.pass == "password"); 1109 1110 assert(parseURL("example.org").toString == "http://example.org/"); 1111 assert(parseURL("http://example.org:80").toString == "http://example.org/"); 1112 1113 assert(parseURL("localhost:8070").toString == "http://localhost:8070/"); 1114 } 1115 1116 /** 1117 * Percent-encode a string. 1118 * 1119 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1120 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1121 * everything else, there is percent encoding. 1122 */ 1123 string percentEncode(string raw) { 1124 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 1125 // We *can* encode any other characters. 1126 // We *should not* encode alpha, numeric, or -._~. 1127 import std.utf : encode; 1128 import std.array : Appender; 1129 Appender!string app; 1130 foreach (dchar d; raw) { 1131 if (('a' <= d && 'z' >= d) || 1132 ('A' <= d && 'Z' >= d) || 1133 ('0' <= d && '9' >= d) || 1134 d == '-' || d == '.' || d == '_' || d == '~') { 1135 app ~= d; 1136 continue; 1137 } 1138 // Something simple like a space character? Still in 7-bit ASCII? 1139 // Then we get a single-character string out of it and just encode 1140 // that one bit. 1141 // Something not in 7-bit ASCII? Then we percent-encode each octet 1142 // in the UTF-8 encoding (and hope the server understands UTF-8). 1143 char[] c; 1144 encode(c, d); 1145 auto bytes = cast(ubyte[])c; 1146 foreach (b; bytes) { 1147 app ~= format("%%%02X", b); 1148 } 1149 } 1150 return cast(string)app.data; 1151 } 1152 1153 /// 1154 unittest { 1155 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 1156 assert(percentEncode("~~--..__") == "~~--..__"); 1157 assert(percentEncode("0123456789") == "0123456789"); 1158 1159 string e; 1160 1161 e = percentEncode("☃"); 1162 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 1163 } 1164 1165 /** 1166 * Percent-decode a string. 1167 * 1168 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1169 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1170 * everything else, there is percent encoding. 1171 * 1172 * This explicitly ensures that the result is a valid UTF-8 string. 1173 */ 1174 string percentDecode(string encoded) 1175 { 1176 import std.utf : validate, UTFException; 1177 auto raw = percentDecodeRaw(encoded); 1178 auto s = cast(string) raw; 1179 try 1180 { 1181 validate(s); 1182 } 1183 catch (UTFException e) 1184 { 1185 throw new URLException( 1186 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence."); 1187 } 1188 return s; 1189 } 1190 1191 /// 1192 unittest { 1193 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 1194 assert(percentDecode("~~--..__") == "~~--..__"); 1195 assert(percentDecode("0123456789") == "0123456789"); 1196 1197 string e; 1198 1199 e = percentDecode("%E2%98%83"); 1200 assert(e == "☃", "expected a snowman but got" ~ e); 1201 1202 e = percentDecode("%e2%98%83"); 1203 assert(e == "☃", "expected a snowman but got" ~ e); 1204 1205 try { 1206 // %ES is an invalid percent sequence: 'S' is not a hex digit. 1207 percentDecode("%es"); 1208 assert(false, "expected exception not thrown"); 1209 } catch (URLException) { 1210 } 1211 1212 try { 1213 percentDecode("%e"); 1214 assert(false, "expected exception not thrown"); 1215 } catch (URLException) { 1216 } 1217 } 1218 1219 /** 1220 * Percent-decode a string into a ubyte array. 1221 * 1222 * URL components cannot contain non-ASCII characters, and there are very few characters that are 1223 * safe to include as URL components. Domain names using Unicode values use Punycode. For 1224 * everything else, there is percent encoding. 1225 * 1226 * This yields a ubyte array and will not perform validation on the output. However, an improperly 1227 * formatted input string will result in a URLException. 1228 */ 1229 immutable(ubyte)[] percentDecodeRaw(string encoded) 1230 { 1231 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 1232 import std.array : Appender; 1233 Appender!(immutable(ubyte)[]) app; 1234 for (int i = 0; i < encoded.length; i++) { 1235 if (encoded[i] != '%') { 1236 app ~= encoded[i]; 1237 continue; 1238 } 1239 if (i >= encoded.length - 2) { 1240 throw new URLException("Invalid percent encoded value: expected two characters after " ~ 1241 "percent symbol. Error at index " ~ i.to!string); 1242 } 1243 if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) { 1244 auto b = fromHex(encoded[i + 1]); 1245 auto c = fromHex(encoded[i + 2]); 1246 app ~= cast(ubyte)((b << 4) | c); 1247 } else { 1248 throw new URLException("Invalid percent encoded value: expected two hex digits after " ~ 1249 "percent symbol. Error at index " ~ i.to!string); 1250 } 1251 i += 2; 1252 } 1253 return app.data; 1254 } 1255 1256 private bool isHex(char c) { 1257 return ('0' <= c && '9' >= c) || 1258 ('a' <= c && 'f' >= c) || 1259 ('A' <= c && 'F' >= c); 1260 } 1261 1262 private ubyte fromHex(char s) { 1263 enum caseDiff = 'a' - 'A'; 1264 if (s >= 'a' && s <= 'z') { 1265 s -= caseDiff; 1266 } 1267 return cast(ubyte)("0123456789ABCDEF".indexOf(s)); 1268 } 1269 1270 private string toPuny(string unicodeHostname) 1271 { 1272 if (unicodeHostname[0] == '[') 1273 { 1274 // It's an ipv6 name. 1275 return unicodeHostname; 1276 } 1277 bool mustEncode = false; 1278 foreach (i, dchar d; unicodeHostname) { 1279 auto c = cast(uint) d; 1280 if (c > 0x80) { 1281 mustEncode = true; 1282 break; 1283 } 1284 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 1285 throw new URLException( 1286 format( 1287 "domain name '%s' contains illegal character '%s' at position %s", 1288 unicodeHostname, d, i)); 1289 } 1290 } 1291 if (!mustEncode) { 1292 return unicodeHostname; 1293 } 1294 import std.algorithm.iteration : map; 1295 return unicodeHostname.split('.').map!punyEncode.join("."); 1296 } 1297 1298 private string fromPuny(string hostname) 1299 { 1300 import std.algorithm.iteration : map; 1301 return hostname.split('.').map!punyDecode.join("."); 1302 } 1303 1304 private { 1305 enum delimiter = '-'; 1306 enum marker = "xn--"; 1307 enum ulong damp = 700; 1308 enum ulong tmin = 1; 1309 enum ulong tmax = 26; 1310 enum ulong skew = 38; 1311 enum ulong base = 36; 1312 enum ulong initialBias = 72; 1313 enum dchar initialN = cast(dchar)128; 1314 1315 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 1316 if (firstTime) { 1317 delta /= damp; 1318 } else { 1319 delta /= 2; 1320 } 1321 delta += delta / numPoints; 1322 ulong k = 0; 1323 while (delta > ((base - tmin) * tmax) / 2) { 1324 delta /= (base - tmin); 1325 k += base; 1326 } 1327 return k + (((base - tmin + 1) * delta) / (delta + skew)); 1328 } 1329 } 1330 1331 /** 1332 * Encode the input string using the Punycode algorithm. 1333 * 1334 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1335 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1336 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1337 * 1338 * In order to puny-encode a domain name, you must split it into its components. The following will 1339 * typically suffice: 1340 * --- 1341 * auto domain = "☂.☃.com"; 1342 * auto encodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1343 * --- 1344 */ 1345 string punyEncode(string input) 1346 { 1347 import std.array : Appender; 1348 ulong delta = 0; 1349 dchar n = initialN; 1350 auto i = 0; 1351 auto bias = initialBias; 1352 Appender!string output; 1353 output ~= marker; 1354 auto pushed = 0; 1355 auto codePoints = 0; 1356 foreach (dchar c; input) { 1357 codePoints++; 1358 if (c <= initialN) { 1359 output ~= c; 1360 pushed++; 1361 } 1362 } 1363 if (pushed < codePoints) { 1364 if (pushed > 0) { 1365 output ~= delimiter; 1366 } 1367 } else { 1368 // No encoding to do. 1369 return input; 1370 } 1371 bool first = true; 1372 while (pushed < codePoints) { 1373 auto best = dchar.max; 1374 foreach (dchar c; input) { 1375 if (n <= c && c < best) { 1376 best = c; 1377 } 1378 } 1379 if (best == dchar.max) { 1380 throw new URLException("failed to find a new codepoint to process during punyencode"); 1381 } 1382 delta += (best - n) * (pushed + 1); 1383 if (delta > uint.max) { 1384 // TODO better error message 1385 throw new URLException("overflow during punyencode"); 1386 } 1387 n = best; 1388 foreach (dchar c; input) { 1389 if (c < n) { 1390 delta++; 1391 } 1392 if (c == n) { 1393 ulong q = delta; 1394 auto k = base; 1395 while (true) { 1396 ulong t; 1397 if (k <= bias) { 1398 t = tmin; 1399 } else if (k >= bias + tmax) { 1400 t = tmax; 1401 } else { 1402 t = k - bias; 1403 } 1404 if (q < t) { 1405 break; 1406 } 1407 output ~= digitToBasic(t + ((q - t) % (base - t))); 1408 q = (q - t) / (base - t); 1409 k += base; 1410 } 1411 output ~= digitToBasic(q); 1412 pushed++; 1413 bias = adapt(delta, pushed, first); 1414 first = false; 1415 delta = 0; 1416 } 1417 } 1418 delta++; 1419 n++; 1420 } 1421 return cast(string)output.data; 1422 } 1423 1424 /** 1425 * Decode the input string using the Punycode algorithm. 1426 * 1427 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1428 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1429 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1430 * 1431 * In order to puny-decode a domain name, you must split it into its components. The following will 1432 * typically suffice: 1433 * --- 1434 * auto domain = "xn--m3h.xn--n3h.com"; 1435 * auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1436 * --- 1437 */ 1438 string punyDecode(string input) { 1439 if (!input.startsWith(marker)) { 1440 return input; 1441 } 1442 input = input[marker.length..$]; 1443 1444 // let n = initial_n 1445 dchar n = cast(dchar)128; 1446 1447 // let i = 0 1448 // let bias = initial_bias 1449 // let output = an empty string indexed from 0 1450 size_t i = 0; 1451 auto bias = initialBias; 1452 dchar[] output; 1453 // This reserves a bit more than necessary, but it should be more efficient overall than just 1454 // appending and inserting volo-nolo. 1455 output.reserve(input.length); 1456 1457 // consume all code points before the last delimiter (if there is one) 1458 // and copy them to output, fail on any non-basic code point 1459 // if more than zero code points were consumed then consume one more 1460 // (which will be the last delimiter) 1461 auto end = input.lastIndexOf(delimiter); 1462 if (end > -1) { 1463 foreach (dchar c; input[0..end]) { 1464 output ~= c; 1465 } 1466 input = input[end+1 .. $]; 1467 } 1468 1469 // while the input is not exhausted do begin 1470 size_t pos = 0; 1471 while (pos < input.length) { 1472 // let oldi = i 1473 // let w = 1 1474 auto oldi = i; 1475 auto w = 1; 1476 // for k = base to infinity in steps of base do begin 1477 for (ulong k = base; k < uint.max; k += base) { 1478 // consume a code point, or fail if there was none to consume 1479 // Note that the input is all ASCII, so we can simply index the input string bytewise. 1480 auto c = input[pos]; 1481 pos++; 1482 // let digit = the code point's digit-value, fail if it has none 1483 auto digit = basicToDigit(c); 1484 // let i = i + digit * w, fail on overflow 1485 i += digit * w; 1486 // let t = tmin if k <= bias {+ tmin}, or 1487 // tmax if k >= bias + tmax, or k - bias otherwise 1488 ulong t; 1489 if (k <= bias) { 1490 t = tmin; 1491 } else if (k >= bias + tmax) { 1492 t = tmax; 1493 } else { 1494 t = k - bias; 1495 } 1496 // if digit < t then break 1497 if (digit < t) { 1498 break; 1499 } 1500 // let w = w * (base - t), fail on overflow 1501 w *= (base - t); 1502 // end 1503 } 1504 // let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 1505 bias = adapt(i - oldi, output.length + 1, oldi == 0); 1506 // let n = n + i div (length(output) + 1), fail on overflow 1507 n += i / (output.length + 1); 1508 // let i = i mod (length(output) + 1) 1509 i %= (output.length + 1); 1510 // {if n is a basic code point then fail} 1511 // (We aren't actually going to fail here; it's clear what this means.) 1512 // insert n into output at position i 1513 import std.array : insertInPlace; 1514 (() @trusted { output.insertInPlace(i, cast(dchar)n); })(); // should be @safe but isn't marked 1515 // increment i 1516 i++; 1517 // end 1518 } 1519 return output.to!string; 1520 } 1521 1522 // Lifted from punycode.js. 1523 private dchar digitToBasic(ulong digit) { 1524 return cast(dchar)(digit + 22 + 75 * (digit < 26)); 1525 } 1526 1527 // Lifted from punycode.js. 1528 private uint basicToDigit(char c) { 1529 auto codePoint = cast(uint)c; 1530 if (codePoint - 48 < 10) { 1531 return codePoint - 22; 1532 } 1533 if (codePoint - 65 < 26) { 1534 return codePoint - 65; 1535 } 1536 if (codePoint - 97 < 26) { 1537 return codePoint - 97; 1538 } 1539 return base; 1540 } 1541 1542 unittest { 1543 { 1544 auto a = "b\u00FCcher"; 1545 assert(punyEncode(a) == "xn--bcher-kva"); 1546 } 1547 { 1548 auto a = "b\u00FCc\u00FCher"; 1549 assert(punyEncode(a) == "xn--bcher-kvab"); 1550 } 1551 { 1552 auto a = "ýbücher"; 1553 auto b = punyEncode(a); 1554 assert(b == "xn--bcher-kvaf", b); 1555 } 1556 1557 { 1558 auto a = "mañana"; 1559 assert(punyEncode(a) == "xn--maana-pta"); 1560 } 1561 1562 { 1563 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1564 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 1565 auto b = punyEncode(a); 1566 assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b); 1567 } 1568 import std.stdio; 1569 } 1570 1571 unittest { 1572 { 1573 auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn"); 1574 assert(b == "ليهمابتكلموشعربي؟", b); 1575 } 1576 { 1577 assert(punyDecode("xn--maana-pta") == "mañana"); 1578 } 1579 } 1580 1581 unittest { 1582 import std.string, std.algorithm, std.array, std.range; 1583 { 1584 auto domain = "xn--m3h.xn--n3h.com"; 1585 auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1586 assert(decodedDomain == "☂.☃.com", decodedDomain); 1587 } 1588 { 1589 auto domain = "☂.☃.com"; 1590 auto decodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1591 assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain); 1592 } 1593 }